In [1]:
# !pip install git+https://github.com/alberanid/imdbpy
# !pip install pandas
# !pip install numpy
# !pip install matplotlib
# !pip install seaborn
# !pip install pandas_profiling --upgrade
# !pip install plotly
# !pip install wordcloud
# !pip install Flask
In [2]:
# Import Dataset
# Import File from Loacal Drive
# from google.colab import files
# data_to_load = files.upload()
# from google.colab import drive
# drive.mount('/content/drive')
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import collections
import plotly.express as px
import plotly.graph_objects as go
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk.util import ngrams
from plotly.subplots import make_subplots
from plotly.offline import iplot, init_notebook_mode
from wordcloud import WordCloud, STOPWORDS
from pandas_profiling import ProfileReport
%matplotlib inline
warnings.filterwarnings("ignore")
In [4]:
nltk.download('all')
[nltk_data] Error loading all: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>
Out[4]:
False
In [5]:
# path = '/content/drive/MyDrive/Files/'

path = 'C:\\Users\\pawan\\OneDrive\\Desktop\\ott\\Data\\'
 
df_tvshows = pd.read_csv(path + 'otttvshows.csv')
 
df_tvshows.head()
Out[5]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type
0 1 Snowpiercer 2013 18+ 6.9 94% NaN Daveed Diggs,Iddo Goldberg,Mickey Sumner,Aliso... Action,Drama,Sci-Fi,Thriller United States English Set seven years after the world has become a f... 60.0 tv series 3.0 1 0 0 0 1
1 2 Philadelphia 1993 13+ 8.8 80% NaN Charlie Day,Glenn Howerton,Rob McElhenney,Kait... Comedy United States English The gang, 5 raging alcoholic, narcissists run ... 22.0 tv series 18.0 1 0 0 0 1
2 3 Roma 2018 18+ 8.7 93% NaN Kevin McKidd,Ray Stevenson,Polly Walker,Kerry ... Action,Drama,History,Romance,War United Kingdom,United States English In this British historical drama, the turbulen... 52.0 tv series 2.0 1 0 0 0 1
3 4 Amy 2015 18+ 7.0 87% NaN Amy Brenneman,Richard T. Jones,Jessica Tuck,Ma... Drama United States English A family drama focused on three generations of... 60.0 tv series 6.0 1 0 1 1 1
4 5 The Young Offenders 2016 NaN 8.0 100% NaN Alex Murphy,Chris Walley,Hilary Rose,Dominic M... Comedy United Kingdom,Ireland English NaN 30.0 tv series 3.0 1 0 0 0 1
In [6]:
# profile = ProfileReport(df_tvshows)
# profile
In [7]:
def data_investigate(df):
    print('No of Rows : ', df.shape[0])
    print('No of Coloums : ', df.shape[1])
    print('**'*25)
    print('Colums Names : \n', df.columns)
    print('**'*25)
    print('Datatype of Columns : \n', df.dtypes)
    print('**'*25)
    print('Missing Values : ')
    c = df.isnull().sum()
    c = c[c > 0]
    print(c)
    print('**'*25)
    print('Missing vaules %age wise :\n')
    print((100*(df.isnull().sum()/len(df.index))))
    print('**'*25)
    print('Pictorial Representation : ')
    plt.figure(figsize = (10, 10))
    sns.heatmap(df.isnull(), yticklabels = False, cbar = False)
    plt.show()
In [8]:
data_investigate(df_tvshows)
No of Rows :  5432
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb               float64
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime            float64
Kind                object
Seasons            float64
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
dtype: object
**************************************************
Missing Values : 
Age                1954
IMDb                556
Rotten Tomatoes    4194
Directors          5158
Cast                486
Genres              323
Country             549
Language            638
Plotline           2493
Runtime            1410
Seasons             679
dtype: int64
**************************************************
Missing vaules %age wise :

ID                  0.000000
Title               0.000000
Year                0.000000
Age                35.972018
IMDb               10.235641
Rotten Tomatoes    77.209131
Directors          94.955817
Cast                8.946981
Genres              5.946244
Country            10.106775
Language           11.745214
Plotline           45.894698
Runtime            25.957290
Kind                0.000000
Seasons            12.500000
Netflix             0.000000
Hulu                0.000000
Prime Video         0.000000
Disney+             0.000000
Type                0.000000
dtype: float64
**************************************************
Pictorial Representation : 
In [9]:
# ID
# df_tvshows = df_tvshows.drop(['ID'], axis = 1)
 
# Age
df_tvshows.loc[df_tvshows['Age'].isnull() & df_tvshows['Disney+'] == 1, "Age"] = '13'
# df_tvshows.fillna({'Age' : 18}, inplace = True)
df_tvshows.fillna({'Age' : 'NR'}, inplace = True)
df_tvshows['Age'].replace({'all': '0'}, inplace = True)
df_tvshows['Age'].replace({'7+': '7'}, inplace = True)
df_tvshows['Age'].replace({'13+': '13'}, inplace = True)
df_tvshows['Age'].replace({'16+': '16'}, inplace = True)
df_tvshows['Age'].replace({'18+': '18'}, inplace = True)
# df_tvshows['Age'] = df_tvshows['Age'].astype(int)
 
# IMDb
# df_tvshows.fillna({'IMDb' : df_tvshows['IMDb'].mean()}, inplace = True)
# df_tvshows.fillna({'IMDb' : df_tvshows['IMDb'].median()}, inplace = True)
df_tvshows.fillna({'IMDb' : "NA"}, inplace = True)
 
# Rotten Tomatoes
df_tvshows['Rotten Tomatoes'] = df_tvshows['Rotten Tomatoes'][df_tvshows['Rotten Tomatoes'].notnull()].str.replace('%', '').astype(int)
# df_tvshows['Rotten Tomatoes'] = df_tvshows['Rotten Tomatoes'][df_tvshows['Rotten Tomatoes'].notnull()].astype(int)
# df_tvshows.fillna({'Rotten Tomatoes' : df_tvshows['Rotten Tomatoes'].mean()}, inplace = True)
# df_tvshows.fillna({'Rotten Tomatoes' : df_tvshows['Rotten Tomatoes'].median()}, inplace = True)
# df_tvshows['Rotten Tomatoes'] = df_tvshows['Rotten Tomatoes'].astype(int)
df_tvshows.fillna({'Rotten Tomatoes' : "NA"}, inplace = True)
 
# Directors
# df_tvshows = df_tvshows.drop(['Directors'], axis = 1)
df_tvshows.fillna({'Directors' : "NA"}, inplace = True)
 
# Cast
df_tvshows.fillna({'Cast' : "NA"}, inplace = True)
 
# Genres
df_tvshows.fillna({'Genres': "NA"}, inplace = True)
 
# Country
df_tvshows.fillna({'Country': "NA"}, inplace = True)
 
# Language
df_tvshows.fillna({'Language': "NA"}, inplace = True)
 
# Plotline
df_tvshows.fillna({'Plotline': "NA"}, inplace = True)
 
# Runtime
# df_tvshows.fillna({'Runtime' : df_tvshows['Runtime'].mean()}, inplace = True)
# df_tvshows['Runtime'] = df_tvshows['Runtime'].astype(int)
df_tvshows.fillna({'Runtime' : "NA"}, inplace = True)
 
# Kind
# df_tvshows.fillna({'Kind': "NA"}, inplace = True)
 
# Type
# df_tvshows.fillna({'Type': "NA"}, inplace = True)
# df_tvshows = df_tvshows.drop(['Type'], axis = 1)
 
# Seasons
# df_tvshows.fillna({'Seasons': 1}, inplace = True)
df_tvshows.fillna({'Seasons': "NA"}, inplace = True)
# df_tvshows = df_tvshows.drop(['Seasons'], axis = 1)
# df_tvshows['Seasons'] = df_tvshows['Seasons'].astype(int)
# df_tvshows.fillna({'Seasons' : df_tvshows['Seasons'].mean()}, inplace = True)
# df_tvshows['Seasons'] = df_tvshows['Seasons'].astype(int)
 
# Service Provider
df_tvshows['Service Provider'] = df_tvshows.loc[:, ['Netflix', 'Prime Video', 'Disney+', 'Hulu']].idxmax(axis = 1)
# df_tvshows.drop(['Netflix','Prime Video','Disney+','Hulu'], axis = 1)

# Removing Duplicate and Missing Entries
df_tvshows.dropna(how = 'any', inplace = True)
df_tvshows.drop_duplicates(inplace = True)
In [10]:
data_investigate(df_tvshows)
No of Rows :  5432
No of Coloums :  21
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type',
       'Service Provider'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb                object
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime             object
Kind                object
Seasons             object
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
Service Provider    object
dtype: object
**************************************************
Missing Values : 
Series([], dtype: int64)
**************************************************
Missing vaules %age wise :

ID                  0.0
Title               0.0
Year                0.0
Age                 0.0
IMDb                0.0
Rotten Tomatoes     0.0
Directors           0.0
Cast                0.0
Genres              0.0
Country             0.0
Language            0.0
Plotline            0.0
Runtime             0.0
Kind                0.0
Seasons             0.0
Netflix             0.0
Hulu                0.0
Prime Video         0.0
Disney+             0.0
Type                0.0
Service Provider    0.0
dtype: float64
**************************************************
Pictorial Representation : 
In [11]:
df_tvshows.head()
Out[11]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 1 Snowpiercer 2013 18 6.9 94 NA Daveed Diggs,Iddo Goldberg,Mickey Sumner,Aliso... Action,Drama,Sci-Fi,Thriller United States ... Set seven years after the world has become a f... 60 tv series 3 1 0 0 0 1 Netflix
1 2 Philadelphia 1993 13 8.8 80 NA Charlie Day,Glenn Howerton,Rob McElhenney,Kait... Comedy United States ... The gang, 5 raging alcoholic, narcissists run ... 22 tv series 18 1 0 0 0 1 Netflix
2 3 Roma 2018 18 8.7 93 NA Kevin McKidd,Ray Stevenson,Polly Walker,Kerry ... Action,Drama,History,Romance,War United Kingdom,United States ... In this British historical drama, the turbulen... 52 tv series 2 1 0 0 0 1 Netflix
3 4 Amy 2015 18 7 87 NA Amy Brenneman,Richard T. Jones,Jessica Tuck,Ma... Drama United States ... A family drama focused on three generations of... 60 tv series 6 1 0 1 1 1 Netflix
4 5 The Young Offenders 2016 NR 8 100 NA Alex Murphy,Chris Walley,Hilary Rose,Dominic M... Comedy United Kingdom,Ireland ... NA 30 tv series 3 1 0 0 0 1 Netflix

5 rows × 21 columns

In [12]:
df_tvshows.describe()
Out[12]:
ID Year Netflix Hulu Prime Video Disney+ Type
count 5432.000000 5432.000000 5432.000000 5432.000000 5432.000000 5432.000000 5432.0
mean 2716.500000 2010.668446 0.341311 0.293999 0.403351 0.033689 1.0
std 1568.227662 11.726176 0.474193 0.455633 0.490615 0.180445 0.0
min 1.000000 1901.000000 0.000000 0.000000 0.000000 0.000000 1.0
25% 1358.750000 2009.000000 0.000000 0.000000 0.000000 0.000000 1.0
50% 2716.500000 2014.000000 0.000000 0.000000 0.000000 0.000000 1.0
75% 4074.250000 2017.000000 1.000000 1.000000 1.000000 0.000000 1.0
max 5432.000000 2020.000000 1.000000 1.000000 1.000000 1.000000 1.0
In [13]:
df_tvshows.corr()
Out[13]:
ID Year Netflix Hulu Prime Video Disney+ Type
ID 1.000000 -0.031346 -0.646330 0.034293 0.441264 0.195409 NaN
Year -0.031346 1.000000 0.222316 -0.065807 -0.198675 -0.022741 NaN
Netflix -0.646330 0.222316 1.000000 -0.366515 -0.515086 -0.119344 NaN
Hulu 0.034293 -0.065807 -0.366515 1.000000 -0.377374 -0.075701 NaN
Prime Video 0.441264 -0.198675 -0.515086 -0.377374 1.000000 -0.151442 NaN
Disney+ 0.195409 -0.022741 -0.119344 -0.075701 -0.151442 1.000000 NaN
Type NaN NaN NaN NaN NaN NaN NaN
In [14]:
# df_tvshows.sort_values('Year', ascending = True)
# df_tvshows.sort_values('IMDb', ascending = False)
In [15]:
# df_tvshows.to_csv(path_or_buf= '/content/drive/MyDrive/Files/updated_otttvshows.csv', index = False)
 
# path = '/content/drive/MyDrive/Files/'
 
# udf_tvshows = pd.read_csv(path + 'updated_otttvshows.csv')
 
# udf_tvshows
In [16]:
# df_netflix_tvshows = df_tvshows.loc[(df_tvshows['Netflix'] > 0)]
# df_hulu_tvshows = df_tvshows.loc[(df_tvshows['Hulu'] > 0)]
# df_prime_video_tvshows = df_tvshows.loc[(df_tvshows['Prime Video'] > 0)]
# df_disney_tvshows = df_tvshows.loc[(df_tvshows['Disney+'] > 0)]
In [17]:
df_netflix_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 1) & (df_tvshows['Hulu'] == 0) & (df_tvshows['Prime Video'] == 0 ) & (df_tvshows['Disney+'] == 0)]
df_hulu_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 0) & (df_tvshows['Hulu'] == 1) & (df_tvshows['Prime Video'] == 0 ) & (df_tvshows['Disney+'] == 0)]
df_prime_video_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 0) & (df_tvshows['Hulu'] == 0) & (df_tvshows['Prime Video'] == 1 ) & (df_tvshows['Disney+'] == 0)]
df_disney_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 0) & (df_tvshows['Hulu'] == 0) & (df_tvshows['Prime Video'] == 0 ) & (df_tvshows['Disney+'] == 1)]
In [18]:
df_tvshows_runtimes = df_tvshows.copy()
In [19]:
df_tvshows_runtimes.drop(df_tvshows_runtimes.loc[df_tvshows_runtimes['Runtime'] == "NA"].index, inplace = True)
# df_tvshows_runtimes = df_tvshows_runtimes[df_tvshows_runtimes.Runtime != "NA"]
df_tvshows_runtimes['Runtime'] = df_tvshows_runtimes['Runtime'].astype(int)
In [20]:
# Creating distinct dataframes only with the tvshows present on individual streaming platforms
netflix_runtimes_tvshows = df_tvshows_runtimes.loc[df_tvshows_runtimes['Netflix'] == 1]
hulu_runtimes_tvshows = df_tvshows_runtimes.loc[df_tvshows_runtimes['Hulu'] == 1]
prime_video_runtimes_tvshows = df_tvshows_runtimes.loc[df_tvshows_runtimes['Prime Video'] == 1]
disney_runtimes_tvshows = df_tvshows_runtimes.loc[df_tvshows_runtimes['Disney+'] == 1]
In [21]:
df_tvshows_runtimes_group = df_tvshows_runtimes.copy()
In [22]:
df_tvshows_screentimes = df_tvshows_runtimes.copy()
df_tvshows_screentimes['Screentime'] = round(df_tvshows_runtimes['Runtime']/60, ndigits = 2)
In [23]:
# Creating distinct dataframes only with the tvshows present on individual streaming platforms
netflix_screentimes_tvshows = df_tvshows_screentimes.loc[df_tvshows_screentimes['Netflix'] == 1]
hulu_screentimes_tvshows = df_tvshows_screentimes.loc[df_tvshows_screentimes['Hulu'] == 1]
prime_video_screentimes_tvshows = df_tvshows_screentimes.loc[df_tvshows_screentimes['Prime Video'] == 1]
disney_screentimes_tvshows = df_tvshows_screentimes.loc[df_tvshows_screentimes['Disney+'] == 1]
In [24]:
plt.figure(figsize = (10, 10))
corr = df_tvshows_runtimes.corr()
# Plot figsize
fig, ax = plt.subplots(figsize=(10, 8))
# Generate Heat Map, allow annotations and place floats in map
sns.heatmap(corr, cmap = 'magma', annot = True, fmt = ".2f")
# Apply xticks
plt.xticks(range(len(corr.columns)), corr.columns);
# Apply yticks
plt.yticks(range(len(corr.columns)), corr.columns)
# show plot
plt.show()
fig.show()
<Figure size 720x720 with 0 Axes>
In [25]:
df_runtimes_high_tvshows = df_tvshows_runtimes.sort_values(by = 'Runtime', ascending = False).reset_index()
df_runtimes_high_tvshows = df_runtimes_high_tvshows.drop(['index'], axis = 1)
# filter = (df_tvshows_runtimes['Runtime'] == (df_tvshows_runtimes['Runtime'].max()))
# df_runtimes_high_tvshows = df_tvshows_runtimes[filter]
 
# highest_rated_tvshows = df_tvshows_runtimes.loc[df_tvshows_runtimes['Runtime'].idxmax()]
 
print('\nTV Shows with Highest Ever Runtime  are : \n')
df_runtimes_high_tvshows.head(5)
TV Shows with Highest Ever Runtime  are : 

Out[25]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 367 Colorado 1940 0 8.3 NA NA Raymond Burr,Barbara Carrera,Richard Chamberla... Action,Adventure,Drama,Romance,Western United States ... Part of Mary's soul has been ripped from her b... 1256 tv series 1 0 0 1 0 1 Prime Video
1 3843 Baseball 1994 7 9.2 NA NA John Chancellor,Daniel Okrent,Ossie Davis,Paul... Documentary,History,Sport United States ... An assassin named Al Simmons is double-crossed... 1140 tv series 1 0 0 1 0 1 Prime Video
2 601 The Vietnam War 2017 18 9.1 96 NA Peter Coyote,Huy Duc,James Willbanks,Duong Van... Documentary,History,War NA ... NA 990 tv series 1 1 0 0 0 1 Netflix
3 937 The Time in Between 2013 7 8.3 NA NA Adriana Ugarte,Mari Carmen Sánchez,Tristán Ull... Adventure,Drama,History,Mystery,Romance Spain ... Vampire Knight tells the story of Yuki Cross. ... 853 tv series 1 1 0 0 0 1 Netflix
4 3835 Too Old to Die Young 2019 18 7.4 70 NA Miles Teller,Augusto Aguilera,Cristina Rodlo,N... Crime,Drama,Thriller United States ... It's 1953, and Sidney Chambers is vicar of Gra... 758 tv series 1 0 0 1 0 1 Prime Video

5 rows × 21 columns

In [26]:
fig = px.bar(y = df_runtimes_high_tvshows['Title'][:15],
             x = df_runtimes_high_tvshows['Runtime'][:15], 
             color = df_runtimes_high_tvshows['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Runtime : In Minutes'},
             title  = 'TV Shows with Highest Runtime in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [27]:
df_runtimes_low_tvshows = df_tvshows_runtimes.sort_values(by = 'Runtime', ascending = True).reset_index()
df_runtimes_low_tvshows = df_runtimes_low_tvshows.drop(['index'], axis = 1)
# filter = (df_tvshows_runtimes['Runtime'] == (df_tvshows_runtimes['Runtime'].min()))
# df_runtimes_low_tvshows = df_tvshows_runtimes[filter]

print('\nTV Shows with Lowest Ever Runtime  are : \n')
df_runtimes_low_tvshows.head(5)
TV Shows with Lowest Ever Runtime  are : 

Out[27]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 4685 DoongDoong 2020 NR 6.8 NA NA Seunghee Nam,Sunyoung Park Animation South Korea ... NA 1 tv series NA 0 0 1 0 1 Prime Video
1 4934 Grandma's Cats (Are Trying To Kill Her!) 2015 7 NA NA NA Malcolm Campbell,Louie Granda Animation,Comedy United States ... NA 2 tv series NA 0 0 1 0 1 Prime Video
2 2025 Larva 2011 7 7.3 NA NA Beom-gi Hong Animation,Short,Comedy,Family South Korea ... NA 2 tv series 5 1 0 0 0 1 Netflix
3 5154 The Family Blend! 2016 7 NA NA NA Sebastian Foxworth Animation United States ... NA 2 tv series NA 0 0 1 0 1 Prime Video
4 4541 Originalos 2010 0 7 NA NA NA Animation Denmark ... NA 3 tv series 1 0 0 1 0 1 Prime Video

5 rows × 21 columns

In [28]:
fig = px.bar(y = df_runtimes_low_tvshows['Title'][:15],
             x = df_runtimes_low_tvshows['Runtime'][:15], 
             color = df_runtimes_low_tvshows['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Runtime : In Minutes'},
             title  = 'TV Shows with Lowest Runtime in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [29]:
print(f'''
      Total '{df_tvshows_runtimes['Runtime'].unique().shape[0]}' unique Runtime s were Given, They were Like this,\n
      
{df_tvshows_runtimes.sort_values(by = 'Runtime', ascending = False)['Runtime'].unique()}\n
 
      The Highest Ever Runtime Ever Any TV Show Got is '{df_runtimes_high_tvshows['Title'][0]}' : '{df_runtimes_high_tvshows['Runtime'].max()}'\n
 
      The Lowest Ever Runtime Ever Any TV Show Got is '{df_runtimes_low_tvshows['Title'][0]}' : '{df_runtimes_low_tvshows['Runtime'].min()}'\n
      ''')
      Total '257' unique Runtime s were Given, They were Like this,

      
[1256 1140  990  853  758  720  663  659  650  629  624  594  588  573
  566  542  540  530  525  510  492  480  463  461  460  458  452  440
  436  426  409  403  400  397  396  386  385  379  375  373  370  363
  360  358  357  355  354  352  351  350  347  346  339  338  336  334
  333  331  328  327  325  321  314  313  306  303  302  300  297  296
  292  288  287  284  282  281  280  277  274  270  269  267  264  260
  258  255  254  253  248  246  245  242  241  240  236  235  231  229
  228  227  225  223  222  220  217  216  213  212  211  210  207  205
  202  200  197  195  192  191  190  188  187  186  185  183  181  180
  177  176  175  174  171  170  169  164  163  159  154  150  148  142
  141  140  138  137  132  131  122  120  118  117  115  114  113  111
  110  107  106  105  104  103  102  101  100   97   96   95   94   93
   92   91   90   89   88   87   86   85   84   83   82   81   80   78
   76   75   74   73   72   71   70   69   67   66   65   64   63   62
   61   60   59   58   57   56   55   54   53   52   51   50   49   48
   47   46   45   44   43   42   41   40   39   38   37   36   35   34
   33   32   31   30   29   28   27   26   25   24   23   22   21   20
   19   18   17   16   15   14   13   12   11   10    9    8    7    6
    5    4    3    2    1]

 
      The Highest Ever Runtime Ever Any TV Show Got is 'Colorado' : '1256'

 
      The Lowest Ever Runtime Ever Any TV Show Got is 'DoongDoong' : '1'

      
In [30]:
netflix_runtimes_high_tvshows = df_runtimes_high_tvshows.loc[df_runtimes_high_tvshows['Netflix']==1].reset_index()
netflix_runtimes_high_tvshows = netflix_runtimes_high_tvshows.drop(['index'], axis = 1)
 
netflix_runtimes_low_tvshows = df_runtimes_low_tvshows.loc[df_runtimes_low_tvshows['Netflix']==1].reset_index()
netflix_runtimes_low_tvshows = netflix_runtimes_low_tvshows.drop(['index'], axis = 1)
 
netflix_runtimes_high_tvshows.head(5)
Out[30]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 601 The Vietnam War 2017 18 9.1 96 NA Peter Coyote,Huy Duc,James Willbanks,Duong Van... Documentary,History,War NA ... NA 990 tv series 1 1 0 0 0 1 Netflix
1 937 The Time in Between 2013 7 8.3 NA NA Adriana Ugarte,Mari Carmen Sánchez,Tristán Ull... Adventure,Drama,History,Mystery,Romance Spain ... Vampire Knight tells the story of Yuki Cross. ... 853 tv series 1 1 0 0 0 1 Netflix
2 1091 World War II In HD Colour 2009 18 8.7 NA NA Robert Powell,Swaylee Loughnane,Mamoru Shigemi... Documentary,History,War United Kingdom ... NA 663 tv series 1 1 0 0 0 1 Netflix
3 727 The Staircase 2005 18 7.9 94 NA Michael Peterson,David Rudolf,Ron Guerette,Mar... Documentary,Crime,Drama France ... NA 629 tv series 1 1 0 0 0 1 Netflix
4 756 The Innocence Files 2020 18 8 100 NA Peter Neufeld,Barry Scheck,Michael West,Gary W... Documentary,Crime United States ... NA 573 tv series 1 1 0 0 0 1 Netflix

5 rows × 21 columns

In [31]:
fig = px.bar(y = netflix_runtimes_high_tvshows['Title'][:15],
             x = netflix_runtimes_high_tvshows['Runtime'][:15], 
             color = netflix_runtimes_high_tvshows['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Runtime : In Minutes'},
             title  = 'TV Shows with Highest Runtime in Minutes : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [32]:
fig = px.bar(y = netflix_runtimes_low_tvshows['Title'][:15],
             x = netflix_runtimes_low_tvshows['Runtime'][:15], 
             color = netflix_runtimes_low_tvshows['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Runtime : In Minutes'},
             title  = 'TV Shows with Lowest Runtime in Minutes : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [33]:
hulu_runtimes_high_tvshows = df_runtimes_high_tvshows.loc[df_runtimes_high_tvshows['Hulu']==1].reset_index()
hulu_runtimes_high_tvshows = hulu_runtimes_high_tvshows.drop(['index'], axis = 1)
 
hulu_runtimes_low_tvshows = df_runtimes_low_tvshows.loc[df_runtimes_low_tvshows['Hulu']==1].reset_index()
hulu_runtimes_low_tvshows = hulu_runtimes_low_tvshows.drop(['index'], axis = 1)
 
hulu_runtimes_high_tvshows.head(5)
Out[33]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 2495 Roots 2016 18 8.4 96 NA Robert Reed,John Amos,Louis Gossett Jr.,Lynda ... Biography,Drama,History,War United States ... Escaping Planet Shlorp before the Asteroid hit... 588 tv series 1 0 1 0 0 1 Hulu
1 3062 Criminal Justice 2008 18 8.5 NA NA Riz Ahmed,Bill Camp,Payman Maadi,John Turturro... Crime,Drama,Mystery United States ... NA 525 tv series 1 0 1 0 0 1 Hulu
2 3157 Bleak House 1985 7 8.3 NA NA Anna Maxwell Martin,Denis Lawson,Carey Mulliga... Crime,Drama United Kingdom,United States ... The acerbic, hilarious Claire Bennett becomes ... 510 tv series 1 0 1 1 0 1 Prime Video
3 2553 Bleak House 2005 7 8.3 NA NA Anna Maxwell Martin,Denis Lawson,Carey Mulliga... Crime,Drama United Kingdom,United States ... NA 510 tv series 1 0 1 0 0 1 Hulu
4 2441 The Looming Tower 2018 18 8 88 NA Jeff Daniels,Tahar Rahim,Wrenn Schmidt,Bill Ca... Drama,History United States ... Set in England at the end of the War of the Ro... 492 tv series 1 0 1 0 0 1 Hulu

5 rows × 21 columns

In [34]:
fig = px.bar(y = hulu_runtimes_high_tvshows['Title'][:15],
             x = hulu_runtimes_high_tvshows['Runtime'][:15], 
             color = hulu_runtimes_high_tvshows['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Runtime : In Minutes'},
             title  = 'TV Shows with Highest Runtime in Minutes : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [35]:
fig = px.bar(y = hulu_runtimes_low_tvshows['Title'][:15],
             x = hulu_runtimes_low_tvshows['Runtime'][:15], 
             color = hulu_runtimes_low_tvshows['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Runtime : In Minutes'},
             title  = 'TV Shows with Lowest Runtime in Minutes : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [36]:
prime_video_runtimes_high_tvshows = df_runtimes_high_tvshows.loc[df_runtimes_high_tvshows['Prime Video']==1].reset_index()
prime_video_runtimes_high_tvshows = prime_video_runtimes_high_tvshows.drop(['index'], axis = 1)
 
prime_video_runtimes_low_tvshows = df_runtimes_low_tvshows.loc[df_runtimes_low_tvshows['Prime Video']==1].reset_index()
prime_video_runtimes_low_tvshows = prime_video_runtimes_low_tvshows.drop(['index'], axis = 1)
 
prime_video_runtimes_high_tvshows.head(5)
Out[36]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 367 Colorado 1940 0 8.3 NA NA Raymond Burr,Barbara Carrera,Richard Chamberla... Action,Adventure,Drama,Romance,Western United States ... Part of Mary's soul has been ripped from her b... 1256 tv series 1 0 0 1 0 1 Prime Video
1 3843 Baseball 1994 7 9.2 NA NA John Chancellor,Daniel Okrent,Ossie Davis,Paul... Documentary,History,Sport United States ... An assassin named Al Simmons is double-crossed... 1140 tv series 1 0 0 1 0 1 Prime Video
2 3835 Too Old to Die Young 2019 18 7.4 70 NA Miles Teller,Augusto Aguilera,Cristina Rodlo,N... Crime,Drama,Thriller United States ... It's 1953, and Sidney Chambers is vicar of Gra... 758 tv series 1 0 0 1 0 1 Prime Video
3 3989 Mankind: The Story of All of Us 2012 7 7.8 83 NA Josh Brolin,Richard Machowicz,James Meigs,Geor... Documentary,History United States ... The tongues of London high society gossips beg... 720 tv series 1 0 0 1 0 1 Prime Video
4 3850 Brideshead Revisited 1981 NR 8.5 80 NA Jeremy Irons,Diana Quick,Roger Milner,Phoebe N... Drama,Romance United Kingdom ... NA 659 tv series 1 0 0 1 0 1 Prime Video

5 rows × 21 columns

In [37]:
fig = px.bar(y = prime_video_runtimes_high_tvshows['Title'][:15],
             x = prime_video_runtimes_high_tvshows['Runtime'][:15], 
             color = prime_video_runtimes_high_tvshows['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Runtime : In Minutes'},
             title  = 'TV Shows with Highest Runtime in Minutes : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [38]:
fig = px.bar(y = prime_video_runtimes_low_tvshows['Title'][:15],
             x = prime_video_runtimes_low_tvshows['Runtime'][:15], 
             color = prime_video_runtimes_low_tvshows['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Runtime : In Minutes'},
             title  = 'TV Shows with Lowest Runtime in Minutes : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [39]:
disney_runtimes_high_tvshows = df_runtimes_high_tvshows.loc[df_runtimes_high_tvshows['Disney+']==1].reset_index()
disney_runtimes_high_tvshows = disney_runtimes_high_tvshows.drop(['index'], axis = 1)
 
disney_runtimes_low_tvshows = df_runtimes_low_tvshows.loc[df_runtimes_low_tvshows['Disney+']==1].reset_index()
disney_runtimes_low_tvshows = disney_runtimes_low_tvshows.drop(['index'], axis = 1)
 
disney_runtimes_high_tvshows.head(5)
Out[39]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 487 Stargirl 2020 7 7.3 70 NA Brec Bassinger,Yvette Monreal,Anjelika Washing... Action,Adventure,Crime,Drama,Fantasy,Sci-Fi United States ... NA 566 tv series 2 0 0 0 1 1 Disney+
1 5430 Wild Russia 2018 7 8.4 NA NA Christian Brückner,Jason Hildebrandt,Paterson ... Documentary Germany ... NA 360 tv series 2 0 0 0 1 1 Disney+
2 484 Invincible 2006 7 8.8 72 Angelina Jolie Steven Yeun,Sandra Oh,J.K. Simmons,Zazie Beetz... Animation,Action,Adventure,Drama,Fantasy,Horro... United States ... In February of 1952, one of the worst storms t... 137 tv series 1 0 0 0 1 1 Disney+
3 5337 Dog Whisperer 2004 0 8 NA NA Cesar Millan,Daddy,Paul Dini,Ilusion Millan,Sh... Family,Reality-TV United States ... An animated comedy adventure series that follo... 110 tv series 9 0 0 0 1 1 Disney+
4 486 Ruby Bridges 1998 7 7.2 83 Euzhan Palcy Penelope Ann Miller,Kevin Pollak,Michael Beach... Adventure,Comedy,Drama,Family,Fantasy United States ... Alonzo Hawk is a mean-spirited property develo... 96 tv series NA 0 0 0 1 1 Disney+

5 rows × 21 columns

In [40]:
fig = px.bar(y = disney_runtimes_high_tvshows['Title'][:15],
             x = disney_runtimes_high_tvshows['Runtime'][:15], 
             color = disney_runtimes_high_tvshows['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Runtime : In Minutes'},
             title  = 'TV Shows with Highest Runtime in Minutes : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [41]:
fig = px.bar(y = disney_runtimes_low_tvshows['Title'][:15],
             x = disney_runtimes_low_tvshows['Runtime'][:15], 
             color = disney_runtimes_low_tvshows['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Runtime : In Minutes'},
             title  = 'TV Shows with Lowest Runtime in Minutes : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [42]:
print(f'''
      The TV Show with Highest Runtime  Ever Got is '{df_runtimes_high_tvshows['Title'][0]}' : '{df_runtimes_high_tvshows['Runtime'].max()}'\n
      The TV Show with Lowest Runtime  Ever Got is '{df_runtimes_low_tvshows['Title'][0]}' : '{df_runtimes_low_tvshows['Runtime'].min()}'\n
      
      The TV Show with Highest Runtime  on 'Netflix' is '{netflix_runtimes_high_tvshows['Title'][0]}' : '{netflix_runtimes_high_tvshows['Runtime'].max()}'\n
      The TV Show with Lowest Runtime  on 'Netflix' is '{netflix_runtimes_low_tvshows['Title'][0]}' : '{netflix_runtimes_low_tvshows['Runtime'].min()}'\n
      
      The TV Show with Highest Runtime  on 'Hulu' is '{hulu_runtimes_high_tvshows['Title'][0]}' : '{hulu_runtimes_high_tvshows['Runtime'].max()}'\n
      The TV Show with Lowest Runtime  on 'Hulu' is '{hulu_runtimes_low_tvshows['Title'][0]}' : '{hulu_runtimes_low_tvshows['Runtime'].min()}'\n
      
      The TV Show with Highest Runtime  on 'Prime Video' is '{prime_video_runtimes_high_tvshows['Title'][0]}' : '{prime_video_runtimes_high_tvshows['Runtime'].max()}'\n
      The TV Show with Lowest Runtime  on 'Prime Video' is '{prime_video_runtimes_low_tvshows['Title'][0]}' : '{prime_video_runtimes_low_tvshows['Runtime'].min()}'\n
      
      The TV Show with Highest Runtime  on 'Disney+' is '{disney_runtimes_high_tvshows['Title'][0]}' : '{disney_runtimes_high_tvshows['Runtime'].max()}'\n
      The TV Show with Lowest Runtime  on 'Disney+' is '{disney_runtimes_low_tvshows['Title'][0]}' : '{disney_runtimes_low_tvshows['Runtime'].min()}'\n 
      ''')
      The TV Show with Highest Runtime  Ever Got is 'Colorado' : '1256'

      The TV Show with Lowest Runtime  Ever Got is 'DoongDoong' : '1'

      
      The TV Show with Highest Runtime  on 'Netflix' is 'The Vietnam War' : '990'

      The TV Show with Lowest Runtime  on 'Netflix' is 'Larva' : '2'

      
      The TV Show with Highest Runtime  on 'Hulu' is 'Roots' : '588'

      The TV Show with Lowest Runtime  on 'Hulu' is 'Mighty Magiswords' : '3'

      
      The TV Show with Highest Runtime  on 'Prime Video' is 'Colorado' : '1256'

      The TV Show with Lowest Runtime  on 'Prime Video' is 'DoongDoong' : '1'

      
      The TV Show with Highest Runtime  on 'Disney+' is 'Stargirl' : '566'

      The TV Show with Lowest Runtime  on 'Disney+' is 'Pixar in Real Life' : '4'
 
      
In [43]:
print(f'''
      Accross All Platforms the Average Runtime  is '{round(df_tvshows_runtimes['Runtime'].mean(), ndigits = 2)}'\n
      The Average Runtime  on 'Netflix' is '{round(netflix_runtimes_tvshows['Runtime'].mean(), ndigits = 2)}'\n
      The Average Runtime  on 'Hulu' is '{round(hulu_runtimes_tvshows['Runtime'].mean(), ndigits = 2)}'\n
      The Average Runtime  on 'Prime Video' is '{round(prime_video_runtimes_tvshows['Runtime'].mean(), ndigits = 2)}'\n
      The Average Runtime  on 'Disney+' is '{round(disney_runtimes_tvshows['Runtime'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Runtime  is '55.19'

      The Average Runtime  on 'Netflix' is '55.9'

      The Average Runtime  on 'Hulu' is '45.53'

      The Average Runtime  on 'Prime Video' is '62.92'

      The Average Runtime  on 'Disney+' is '38.24'
 
      
In [44]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_tvshows_runtimes['Runtime'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_tvshows_runtimes['Runtime'], ax = ax[1])
plt.show()
In [45]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Runtime s Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_runtimes_tvshows['Runtime'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_runtimes_tvshows['Runtime'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_runtimes_tvshows['Runtime'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_runtimes_tvshows['Runtime'][:100], color = 'darkblue', legend = True, kde = True) 
 
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
In [46]:
def round_val(data):
    if str(data) != 'nan':
        return round(data)
        
def round_fix(data):
    if data in range(0,51):
        # print(data)
        return 50
    if data in range(51,101):
        return 100
    if data in range(101,151):
        return 150
    if data in range(151,201):
        return 200
    if data in range(201,251):
        return 250
    if data in range(251,301):
        return 300
    if data in range(301,351):
        return 350
    if data in range(351,401):
        return 400
    if data in range(401,451):
        return 450
    if data in range(451,501):
        return 500
    if data in range(501,551):
        return 550
    if data in range(551,601):
        return 600
    if data in range(601,651):
        return 650
    if data in range(651,701):
        return 700
    if data in range(701,751):
        return 750
    if data in range(751,801):
        return 800
    if data in range(801,851):
        return 850
    if data in range(851,901):
        return 900
    if data in range(901,951):
        return 950
    if data in range(951,1001):
        return 1000
    if data in range(1001,1051):
        return 1050
    if data in range(1051,1101):
        return 1100
    if data in range(1101,1151):
        return 1150
    if data in range(1151,1201):
        return 1200
    if data in range(1201,1251):
        return 1250
    if data in range(1251,1301):
        return 1300
    if data in range(1301,1351):
        return 1350
    if data in range(1351,2001):
        return 2000
In [47]:
df_tvshows_runtimes_group['Runtime Group'] = df_tvshows_runtimes['Runtime'].apply(round_fix)
 
runtimes_values = df_tvshows_runtimes_group['Runtime Group'].value_counts().sort_index(ascending = False).tolist()
runtimes_index = df_tvshows_runtimes_group['Runtime Group'].value_counts().sort_index(ascending = False).index
 
# runtimes_values, runtimes_index
In [48]:
runtimes_group_count = df_tvshows_runtimes_group.groupby('Runtime Group')['Title'].count()
runtimes_group_tvshows = df_tvshows_runtimes_group.groupby('Runtime Group')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
runtimes_group_data_tvshows = pd.concat([runtimes_group_count, runtimes_group_tvshows], axis = 1).reset_index().rename(columns = {'Title' : 'TV Shows Count'})
runtimes_group_data_tvshows = runtimes_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)
In [49]:
# Runtime Group with TV Shows Counts - All Platforms Combined
runtimes_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)
Out[49]:
Runtime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 50 2666 970 945 847 139
1 100 1083 400 316 418 17
2 150 73 22 12 41 2
3 200 46 12 6 31 0
4 250 36 15 4 18 0
5 300 31 12 4 16 0
7 400 29 14 2 12 1
6 350 23 5 6 14 0
8 450 7 3 3 1 0
9 500 7 3 3 1 0
10 550 6 1 3 3 0
11 600 4 1 1 1 1
12 650 3 1 0 2 0
13 700 2 1 0 1 0
14 750 1 0 0 1 0
15 800 1 0 0 1 0
16 900 1 1 0 0 0
17 1000 1 1 0 0 0
18 1150 1 0 0 1 0
19 1300 1 0 0 1 0
In [50]:
runtimes_group_data_tvshows.sort_values(by = 'Runtime Group', ascending = False)
Out[50]:
Runtime Group TV Shows Count Netflix Hulu Prime Video Disney+
19 1300 1 0 0 1 0
18 1150 1 0 0 1 0
17 1000 1 1 0 0 0
16 900 1 1 0 0 0
15 800 1 0 0 1 0
14 750 1 0 0 1 0
13 700 2 1 0 1 0
12 650 3 1 0 2 0
11 600 4 1 1 1 1
10 550 6 1 3 3 0
9 500 7 3 3 1 0
8 450 7 3 3 1 0
7 400 29 14 2 12 1
6 350 23 5 6 14 0
5 300 31 12 4 16 0
4 250 36 15 4 18 0
3 200 46 12 6 31 0
2 150 73 22 12 41 2
1 100 1083 400 316 418 17
0 50 2666 970 945 847 139
In [51]:
fig = px.bar(y = runtimes_group_data_tvshows['TV Shows Count'],
             x = runtimes_group_data_tvshows['Runtime Group'], 
             color = runtimes_group_data_tvshows['Runtime Group'],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows Count', 'x' : 'Runtime : In Minutes'},
             title  = 'TV Shows with Group Runtime in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [52]:
fig = px.pie(runtimes_group_data_tvshows[:10],
             names = runtimes_group_data_tvshows['Runtime Group'],
             values = runtimes_group_data_tvshows['TV Shows Count'],
             color = runtimes_group_data_tvshows['TV Shows Count'],
             color_discrete_sequence = px.colors.sequential.Teal)

fig.update_traces(textinfo = 'percent+label',
                  title = 'TV Shows Count based on Runtime Group')
fig.show()
In [53]:
df_runtimes_group_high_tvshows = runtimes_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False).reset_index()
df_runtimes_group_high_tvshows = df_runtimes_group_high_tvshows.drop(['index'], axis = 1)
# filter = (runtimes_group_data_tvshows['TV Shows Count'] ==  (runtimes_group_data_tvshows['TV Shows Count'].max()))
# df_runtimes_group_high_tvshows = runtimes_group_data_tvshows[filter]
 
# highest_rated_tvshows = runtimes_group_data_tvshows.loc[runtimes_group_data_tvshows['TV Shows Count'].idxmax()]
 
# print('\nRuntime with Highest Ever TV Shows Count are : All Platforms Combined\n')
df_runtimes_group_high_tvshows.head(5)
Out[53]:
Runtime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 50 2666 970 945 847 139
1 100 1083 400 316 418 17
2 150 73 22 12 41 2
3 200 46 12 6 31 0
4 250 36 15 4 18 0
In [54]:
df_runtimes_group_low_tvshows = runtimes_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = True).reset_index()
df_runtimes_group_low_tvshows = df_runtimes_group_low_tvshows.drop(['index'], axis = 1)
# filter = (runtimes_group_data_tvshows['TV Shows Count'] = =  (runtimes_group_data_tvshows['TV Shows Count'].min()))
# df_runtimes_group_low_tvshows = runtimes_group_data_tvshows[filter]
 
# print('\nRuntime with Lowest Ever TV Shows Count are : All Platforms Combined\n')
df_runtimes_group_low_tvshows.head(5)
Out[54]:
Runtime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 1300 1 0 0 1 0
1 1000 1 1 0 0 0
2 900 1 1 0 0 0
3 800 1 0 0 1 0
4 750 1 0 0 1 0
In [55]:
print(f'''
      Total '{df_tvshows_runtimes['Runtime'].count()}' Titles are available on All Platforms, out of which\n
      You Can Choose to see TV Shows from Total '{runtimes_group_data_tvshows['Runtime Group'].unique().shape[0]}' Runtime Group, They were Like this, \n
 
      {runtimes_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)['Runtime Group'].unique()} etc. \n
 
      The Runtime Group with Highest TV Shows Count have '{runtimes_group_data_tvshows['TV Shows Count'].max()}' TV Shows Available is '{df_runtimes_group_high_tvshows['Runtime Group'][0]}', &\n
      The Runtime Group with Lowest TV Shows Count have '{runtimes_group_data_tvshows['TV Shows Count'].min()}' TV Shows Available is '{df_runtimes_group_low_tvshows['Runtime Group'][0]}'
      ''')
      Total '4022' Titles are available on All Platforms, out of which

      You Can Choose to see TV Shows from Total '20' Runtime Group, They were Like this, 

 
      [  50  100  150  200  250  300  400  350  450  500  550  600  650  700
  750  800  900 1000 1150 1300] etc. 

 
      The Runtime Group with Highest TV Shows Count have '2666' TV Shows Available is '50', &

      The Runtime Group with Lowest TV Shows Count have '1' TV Shows Available is '1300'
      
In [56]:
netflix_runtimes_group_tvshows = runtimes_group_data_tvshows[runtimes_group_data_tvshows['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_runtimes_group_tvshows = netflix_runtimes_group_tvshows.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
 
netflix_runtimes_group_high_tvshows = df_runtimes_group_high_tvshows.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_runtimes_group_high_tvshows = netflix_runtimes_group_high_tvshows.drop(['index'], axis = 1)
 
netflix_runtimes_group_low_tvshows = df_runtimes_group_high_tvshows.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_runtimes_group_low_tvshows = netflix_runtimes_group_low_tvshows.drop(['index'], axis = 1)
 
netflix_runtimes_group_high_tvshows.head(5)
Out[56]:
Runtime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 50 2666 970 945 847 139
1 100 1083 400 316 418 17
2 150 73 22 12 41 2
3 250 36 15 4 18 0
4 400 29 14 2 12 1
In [57]:
hulu_runtimes_group_tvshows = runtimes_group_data_tvshows[runtimes_group_data_tvshows['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_runtimes_group_tvshows = hulu_runtimes_group_tvshows.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
 
hulu_runtimes_group_high_tvshows = df_runtimes_group_high_tvshows.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_runtimes_group_high_tvshows = hulu_runtimes_group_high_tvshows.drop(['index'], axis = 1)
 
hulu_runtimes_group_low_tvshows = df_runtimes_group_high_tvshows.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_runtimes_group_low_tvshows = hulu_runtimes_group_low_tvshows.drop(['index'], axis = 1)
 
hulu_runtimes_group_high_tvshows.head(5)
Out[57]:
Runtime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 50 2666 970 945 847 139
1 100 1083 400 316 418 17
2 150 73 22 12 41 2
3 200 46 12 6 31 0
4 350 23 5 6 14 0
In [58]:
prime_video_runtimes_group_tvshows = runtimes_group_data_tvshows[runtimes_group_data_tvshows['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_runtimes_group_tvshows = prime_video_runtimes_group_tvshows.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'TV Shows Count'], axis = 1)
 
prime_video_runtimes_group_high_tvshows = df_runtimes_group_high_tvshows.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_runtimes_group_high_tvshows = prime_video_runtimes_group_high_tvshows.drop(['index'], axis = 1)
 
prime_video_runtimes_group_low_tvshows = df_runtimes_group_high_tvshows.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_runtimes_group_low_tvshows = prime_video_runtimes_group_low_tvshows.drop(['index'], axis = 1)
 
prime_video_runtimes_group_high_tvshows.head(5)
Out[58]:
Runtime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 50 2666 970 945 847 139
1 100 1083 400 316 418 17
2 150 73 22 12 41 2
3 200 46 12 6 31 0
4 250 36 15 4 18 0
In [59]:
disney_runtimes_group_tvshows = runtimes_group_data_tvshows[runtimes_group_data_tvshows['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_runtimes_group_tvshows = disney_runtimes_group_tvshows.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'TV Shows Count'], axis = 1)
 
disney_runtimes_group_high_tvshows = df_runtimes_group_high_tvshows.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_runtimes_group_high_tvshows = disney_runtimes_group_high_tvshows.drop(['index'], axis = 1)
 
disney_runtimes_group_low_tvshows = df_runtimes_group_high_tvshows.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_runtimes_group_low_tvshows = disney_runtimes_group_low_tvshows.drop(['index'], axis = 1)
 
disney_runtimes_group_high_tvshows.head(5)
Out[59]:
Runtime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 50 2666 970 945 847 139
1 100 1083 400 316 418 17
2 150 73 22 12 41 2
3 400 29 14 2 12 1
4 600 4 1 1 1 1
In [60]:
print(f'''
      The Runtime Group with Highest TV Shows Count Ever Got is '{df_runtimes_group_high_tvshows['Runtime Group'][0]}' : '{df_runtimes_group_high_tvshows['TV Shows Count'].max()}'\n
      The Runtime Group with Lowest TV Shows Count Ever Got is '{df_runtimes_group_low_tvshows['Runtime Group'][0]}' : '{df_runtimes_group_low_tvshows['TV Shows Count'].min()}'\n
      
      The Runtime Group with Highest TV Shows Count on 'Netflix' is '{netflix_runtimes_group_high_tvshows['Runtime Group'][0]}' : '{netflix_runtimes_group_high_tvshows['Netflix'].max()}'\n
      The Runtime Group with Lowest TV Shows Count on 'Netflix' is '{netflix_runtimes_group_low_tvshows['Runtime Group'][0]}' : '{netflix_runtimes_group_low_tvshows['Netflix'].min()}'\n
      
      The Runtime Group with Highest TV Shows Count on 'Hulu' is '{hulu_runtimes_group_high_tvshows['Runtime Group'][0]}' : '{hulu_runtimes_group_high_tvshows['Hulu'].max()}'\n
      The Runtime Group with Lowest TV Shows Count on 'Hulu' is '{hulu_runtimes_group_low_tvshows['Runtime Group'][0]}' : '{hulu_runtimes_group_low_tvshows['Hulu'].min()}'\n
      
      The Runtime Group with Highest TV Shows Count on 'Prime Video' is '{prime_video_runtimes_group_high_tvshows['Runtime Group'][0]}' : '{prime_video_runtimes_group_high_tvshows['Prime Video'].max()}'\n
      The Runtime Group with Lowest TV Shows Count on 'Prime Video' is '{prime_video_runtimes_group_low_tvshows['Runtime Group'][0]}' : '{prime_video_runtimes_group_low_tvshows['Prime Video'].min()}'\n
      
      The Runtime Group with Highest TV Shows Count on 'Disney+' is '{disney_runtimes_group_high_tvshows['Runtime Group'][0]}' : '{disney_runtimes_group_high_tvshows['Disney+'].max()}'\n
      The Runtime Group with Lowest TV Shows Count on 'Disney+' is '{disney_runtimes_group_low_tvshows['Runtime Group'][0]}' : '{disney_runtimes_group_low_tvshows['Disney+'].min()}'\n 
      ''')
      The Runtime Group with Highest TV Shows Count Ever Got is '50' : '2666'

      The Runtime Group with Lowest TV Shows Count Ever Got is '1300' : '1'

      
      The Runtime Group with Highest TV Shows Count on 'Netflix' is '50' : '970'

      The Runtime Group with Lowest TV Shows Count on 'Netflix' is '1300' : '0'

      
      The Runtime Group with Highest TV Shows Count on 'Hulu' is '50' : '945'

      The Runtime Group with Lowest TV Shows Count on 'Hulu' is '1300' : '0'

      
      The Runtime Group with Highest TV Shows Count on 'Prime Video' is '50' : '847'

      The Runtime Group with Lowest TV Shows Count on 'Prime Video' is '1000' : '0'

      
      The Runtime Group with Highest TV Shows Count on 'Disney+' is '50' : '139'

      The Runtime Group with Lowest TV Shows Count on 'Disney+' is '500' : '0'
 
      
In [61]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.barplot(x = netflix_runtimes_group_tvshows['Runtime Group'][:10], y = netflix_runtimes_group_tvshows['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = hulu_runtimes_group_tvshows['Runtime Group'][:10], y = hulu_runtimes_group_tvshows['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = prime_video_runtimes_group_tvshows['Runtime Group'][:10], y = prime_video_runtimes_group_tvshows['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = disney_runtimes_group_tvshows['Runtime Group'][:10], y = disney_runtimes_group_tvshows['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
 
plt.show()
In [62]:
plt.figure(figsize = (20, 5))
sns.lineplot(x = runtimes_group_data_tvshows['Runtime Group'], y = runtimes_group_data_tvshows['Netflix'], color = 'red')
sns.lineplot(x = runtimes_group_data_tvshows['Runtime Group'], y = runtimes_group_data_tvshows['Hulu'], color = 'lightgreen')
sns.lineplot(x = runtimes_group_data_tvshows['Runtime Group'], y = runtimes_group_data_tvshows['Prime Video'], color = 'lightblue')
sns.lineplot(x = runtimes_group_data_tvshows['Runtime Group'], y = runtimes_group_data_tvshows['Disney+'], color = 'darkblue')
plt.xlabel('Runtime Group', fontsize = 15)
plt.ylabel('TV Shows Count', fontsize = 15)
plt.show()
In [63]:
print(f'''
      Accross All Platforms Total Count of Runtime Group is '{runtimes_group_data_tvshows['Runtime Group'].unique().shape[0]}'\n
      Total Count of Runtime Group on 'Netflix' is '{netflix_runtimes_group_tvshows['Runtime Group'].unique().shape[0]}'\n
      Total Count of Runtime Group on 'Hulu' is '{hulu_runtimes_group_tvshows['Runtime Group'].unique().shape[0]}'\n
      Total Count of Runtime Group on 'Prime Video' is '{prime_video_runtimes_group_tvshows['Runtime Group'].unique().shape[0]}'\n
      Total Count of Runtime Group on 'Disney+' is '{disney_runtimes_group_tvshows['Runtime Group'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Runtime Group is '20'

      Total Count of Runtime Group on 'Netflix' is '16'

      Total Count of Runtime Group on 'Hulu' is '12'

      Total Count of Runtime Group on 'Prime Video' is '18'

      Total Count of Runtime Group on 'Disney+' is '5'
 
      
In [64]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.lineplot(y = runtimes_group_data_tvshows['Runtime Group'], x = runtimes_group_data_tvshows['Netflix'], color = 'red', ax = axes[0, 0])
h_ru_ax2 = sns.lineplot(y = runtimes_group_data_tvshows['Runtime Group'], x = runtimes_group_data_tvshows['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_ru_ax3 = sns.lineplot(y = runtimes_group_data_tvshows['Runtime Group'], x = runtimes_group_data_tvshows['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_ru_ax4 = sns.lineplot(y = runtimes_group_data_tvshows['Runtime Group'], x = runtimes_group_data_tvshows['Disney+'], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])

plt.show()
In [65]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.barplot(x = runtimes_group_data_tvshows['Runtime Group'][:10], y = runtimes_group_data_tvshows['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = runtimes_group_data_tvshows['Runtime Group'][:10], y = runtimes_group_data_tvshows['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = runtimes_group_data_tvshows['Runtime Group'][:10], y = runtimes_group_data_tvshows['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = runtimes_group_data_tvshows['Runtime Group'][:10], y = runtimes_group_data_tvshows['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
 
plt.show()
In [66]:
df_screentimes_high_tvshows = df_tvshows_screentimes.sort_values(by = 'Screentime', ascending = False).reset_index()
df_screentimes_high_tvshows = df_screentimes_high_tvshows.drop(['index'], axis = 1)
# filter = (df_tvshows_screentimes['Screentime'] == (df_tvshows_screentimes['Screentime'].max()))
# df_screentimes_high_tvshows = df_tvshows_screentimes[filter]
 
# highest_rated_tvshows = df_tvshows_screentimes.loc[df_tvshows_screentimes['Screentime'].idxmax()]
 
print('\nTV Shows with Highest Ever Screentime  are : \n')
df_screentimes_high_tvshows.head(5)
TV Shows with Highest Ever Screentime  are : 

Out[66]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider Screentime
0 367 Colorado 1940 0 8.3 NA NA Raymond Burr,Barbara Carrera,Richard Chamberla... Action,Adventure,Drama,Romance,Western United States ... 1256 tv series 1 0 0 1 0 1 Prime Video 20.93
1 3843 Baseball 1994 7 9.2 NA NA John Chancellor,Daniel Okrent,Ossie Davis,Paul... Documentary,History,Sport United States ... 1140 tv series 1 0 0 1 0 1 Prime Video 19.00
2 601 The Vietnam War 2017 18 9.1 96 NA Peter Coyote,Huy Duc,James Willbanks,Duong Van... Documentary,History,War NA ... 990 tv series 1 1 0 0 0 1 Netflix 16.50
3 937 The Time in Between 2013 7 8.3 NA NA Adriana Ugarte,Mari Carmen Sánchez,Tristán Ull... Adventure,Drama,History,Mystery,Romance Spain ... 853 tv series 1 1 0 0 0 1 Netflix 14.22
4 3835 Too Old to Die Young 2019 18 7.4 70 NA Miles Teller,Augusto Aguilera,Cristina Rodlo,N... Crime,Drama,Thriller United States ... 758 tv series 1 0 0 1 0 1 Prime Video 12.63

5 rows × 22 columns

In [67]:
fig = px.bar(y = df_screentimes_high_tvshows['Title'][:15],
             x = df_screentimes_high_tvshows['Screentime'][:15], 
             color = df_screentimes_high_tvshows['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Screentime : In Hours'},
             title  = 'TV Shows with Highest Screentime in Hours : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [68]:
df_screentimes_low_tvshows = df_tvshows_screentimes.sort_values(by = 'Screentime', ascending = True).reset_index()
df_screentimes_low_tvshows = df_screentimes_low_tvshows.drop(['index'], axis = 1)
# filter = (df_tvshows_screentimes['Screentime'] == (df_tvshows_screentimes['Screentime'].min()))
# df_screentimes_low_tvshows = df_tvshows_screentimes[filter]

print('\nTV Shows with Lowest Ever Screentime  are : \n')
df_screentimes_low_tvshows.head(5)
TV Shows with Lowest Ever Screentime  are : 

Out[68]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider Screentime
0 4685 DoongDoong 2020 NR 6.8 NA NA Seunghee Nam,Sunyoung Park Animation South Korea ... 1 tv series NA 0 0 1 0 1 Prime Video 0.02
1 4934 Grandma's Cats (Are Trying To Kill Her!) 2015 7 NA NA NA Malcolm Campbell,Louie Granda Animation,Comedy United States ... 2 tv series NA 0 0 1 0 1 Prime Video 0.03
2 2025 Larva 2011 7 7.3 NA NA Beom-gi Hong Animation,Short,Comedy,Family South Korea ... 2 tv series 5 1 0 0 0 1 Netflix 0.03
3 5154 The Family Blend! 2016 7 NA NA NA Sebastian Foxworth Animation United States ... 2 tv series NA 0 0 1 0 1 Prime Video 0.03
4 4541 Originalos 2010 0 7 NA NA NA Animation Denmark ... 3 tv series 1 0 0 1 0 1 Prime Video 0.05

5 rows × 22 columns

In [69]:
fig = px.bar(y = df_screentimes_low_tvshows['Title'][:15],
             x = df_screentimes_low_tvshows['Screentime'][:15], 
             color = df_screentimes_low_tvshows['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Screentime : In Hours'},
             title  = 'TV Shows with Lowest Screentime in Hours : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [70]:
print(f'''
      Total '{df_tvshows_screentimes['Screentime'].unique().shape[0]}' unique Screentime s were Given, They were Like this,\n
      
{df_tvshows_screentimes.sort_values(by = 'Screentime', ascending = False)['Screentime'].unique()}\n
 
      The Highest Ever Screentime Ever Any TV Show Got is '{df_screentimes_high_tvshows['Title'][0]}' : '{df_screentimes_high_tvshows['Screentime'].max()}'\n
 
      The Lowest Ever Screentime Ever Any TV Show Got is '{df_screentimes_low_tvshows['Title'][0]}' : '{df_screentimes_low_tvshows['Screentime'].min()}'\n
      ''')
      Total '257' unique Screentime s were Given, They were Like this,

      
[2.093e+01 1.900e+01 1.650e+01 1.422e+01 1.263e+01 1.200e+01 1.105e+01
 1.098e+01 1.083e+01 1.048e+01 1.040e+01 9.900e+00 9.800e+00 9.550e+00
 9.430e+00 9.030e+00 9.000e+00 8.830e+00 8.750e+00 8.500e+00 8.200e+00
 8.000e+00 7.720e+00 7.680e+00 7.670e+00 7.630e+00 7.530e+00 7.330e+00
 7.270e+00 7.100e+00 6.820e+00 6.720e+00 6.670e+00 6.620e+00 6.600e+00
 6.430e+00 6.420e+00 6.320e+00 6.250e+00 6.220e+00 6.170e+00 6.050e+00
 6.000e+00 5.970e+00 5.950e+00 5.920e+00 5.900e+00 5.870e+00 5.850e+00
 5.830e+00 5.780e+00 5.770e+00 5.650e+00 5.630e+00 5.600e+00 5.570e+00
 5.550e+00 5.520e+00 5.470e+00 5.450e+00 5.420e+00 5.350e+00 5.230e+00
 5.220e+00 5.100e+00 5.050e+00 5.030e+00 5.000e+00 4.950e+00 4.930e+00
 4.870e+00 4.800e+00 4.780e+00 4.730e+00 4.700e+00 4.680e+00 4.670e+00
 4.620e+00 4.570e+00 4.500e+00 4.480e+00 4.450e+00 4.400e+00 4.330e+00
 4.300e+00 4.250e+00 4.230e+00 4.220e+00 4.130e+00 4.100e+00 4.080e+00
 4.030e+00 4.020e+00 4.000e+00 3.930e+00 3.920e+00 3.850e+00 3.820e+00
 3.800e+00 3.780e+00 3.750e+00 3.720e+00 3.700e+00 3.670e+00 3.620e+00
 3.600e+00 3.550e+00 3.530e+00 3.520e+00 3.500e+00 3.450e+00 3.420e+00
 3.370e+00 3.330e+00 3.280e+00 3.250e+00 3.200e+00 3.180e+00 3.170e+00
 3.130e+00 3.120e+00 3.100e+00 3.080e+00 3.050e+00 3.020e+00 3.000e+00
 2.950e+00 2.930e+00 2.920e+00 2.900e+00 2.850e+00 2.830e+00 2.820e+00
 2.730e+00 2.720e+00 2.650e+00 2.570e+00 2.500e+00 2.470e+00 2.370e+00
 2.350e+00 2.330e+00 2.300e+00 2.280e+00 2.200e+00 2.180e+00 2.030e+00
 2.000e+00 1.970e+00 1.950e+00 1.920e+00 1.900e+00 1.880e+00 1.850e+00
 1.830e+00 1.780e+00 1.770e+00 1.750e+00 1.730e+00 1.720e+00 1.700e+00
 1.680e+00 1.670e+00 1.620e+00 1.600e+00 1.580e+00 1.570e+00 1.550e+00
 1.530e+00 1.520e+00 1.500e+00 1.480e+00 1.470e+00 1.450e+00 1.430e+00
 1.420e+00 1.400e+00 1.380e+00 1.370e+00 1.350e+00 1.330e+00 1.300e+00
 1.270e+00 1.250e+00 1.230e+00 1.220e+00 1.200e+00 1.180e+00 1.170e+00
 1.150e+00 1.120e+00 1.100e+00 1.080e+00 1.070e+00 1.050e+00 1.030e+00
 1.020e+00 1.000e+00 9.800e-01 9.700e-01 9.500e-01 9.300e-01 9.200e-01
 9.000e-01 8.800e-01 8.700e-01 8.500e-01 8.300e-01 8.200e-01 8.000e-01
 7.800e-01 7.700e-01 7.500e-01 7.300e-01 7.200e-01 7.000e-01 6.800e-01
 6.700e-01 6.500e-01 6.300e-01 6.200e-01 6.000e-01 5.800e-01 5.700e-01
 5.500e-01 5.300e-01 5.200e-01 5.000e-01 4.800e-01 4.700e-01 4.500e-01
 4.300e-01 4.200e-01 4.000e-01 3.800e-01 3.700e-01 3.500e-01 3.300e-01
 3.200e-01 3.000e-01 2.800e-01 2.700e-01 2.500e-01 2.300e-01 2.200e-01
 2.000e-01 1.800e-01 1.700e-01 1.500e-01 1.300e-01 1.200e-01 1.000e-01
 8.000e-02 7.000e-02 5.000e-02 3.000e-02 2.000e-02]

 
      The Highest Ever Screentime Ever Any TV Show Got is 'Colorado' : '20.93'

 
      The Lowest Ever Screentime Ever Any TV Show Got is 'DoongDoong' : '0.02'

      
In [71]:
netflix_screentimes_high_tvshows = df_screentimes_high_tvshows.loc[df_screentimes_high_tvshows['Netflix']==1].reset_index()
netflix_screentimes_high_tvshows = netflix_screentimes_high_tvshows.drop(['index'], axis = 1)
 
netflix_screentimes_low_tvshows = df_screentimes_low_tvshows.loc[df_screentimes_low_tvshows['Netflix']==1].reset_index()
netflix_screentimes_low_tvshows = netflix_screentimes_low_tvshows.drop(['index'], axis = 1)
 
netflix_screentimes_high_tvshows.head(5)
Out[71]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider Screentime
0 601 The Vietnam War 2017 18 9.1 96 NA Peter Coyote,Huy Duc,James Willbanks,Duong Van... Documentary,History,War NA ... 990 tv series 1 1 0 0 0 1 Netflix 16.50
1 937 The Time in Between 2013 7 8.3 NA NA Adriana Ugarte,Mari Carmen Sánchez,Tristán Ull... Adventure,Drama,History,Mystery,Romance Spain ... 853 tv series 1 1 0 0 0 1 Netflix 14.22
2 1091 World War II In HD Colour 2009 18 8.7 NA NA Robert Powell,Swaylee Loughnane,Mamoru Shigemi... Documentary,History,War United Kingdom ... 663 tv series 1 1 0 0 0 1 Netflix 11.05
3 727 The Staircase 2005 18 7.9 94 NA Michael Peterson,David Rudolf,Ron Guerette,Mar... Documentary,Crime,Drama France ... 629 tv series 1 1 0 0 0 1 Netflix 10.48
4 756 The Innocence Files 2020 18 8 100 NA Peter Neufeld,Barry Scheck,Michael West,Gary W... Documentary,Crime United States ... 573 tv series 1 1 0 0 0 1 Netflix 9.55

5 rows × 22 columns

In [72]:
fig = px.bar(y = netflix_screentimes_high_tvshows['Title'][:15],
             x = netflix_screentimes_high_tvshows['Screentime'][:15], 
             color = netflix_screentimes_high_tvshows['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Screentime : In Hours'},
             title  = 'TV Shows with Highest Screentime in Hours : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [73]:
fig = px.bar(y = netflix_screentimes_low_tvshows['Title'][:15],
             x = netflix_screentimes_low_tvshows['Screentime'][:15], 
             color = netflix_screentimes_low_tvshows['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Screentime : In Hours'},
             title  = 'TV Shows with Lowest Screentime in Hours : Netflix')
			 
fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [74]:
hulu_screentimes_high_tvshows = df_screentimes_high_tvshows.loc[df_screentimes_high_tvshows['Hulu']==1].reset_index()
hulu_screentimes_high_tvshows = hulu_screentimes_high_tvshows.drop(['index'], axis = 1)
 
hulu_screentimes_low_tvshows = df_screentimes_low_tvshows.loc[df_screentimes_low_tvshows['Hulu']==1].reset_index()
hulu_screentimes_low_tvshows = hulu_screentimes_low_tvshows.drop(['index'], axis = 1)
 
hulu_screentimes_high_tvshows.head(5)
Out[74]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider Screentime
0 2495 Roots 2016 18 8.4 96 NA Robert Reed,John Amos,Louis Gossett Jr.,Lynda ... Biography,Drama,History,War United States ... 588 tv series 1 0 1 0 0 1 Hulu 9.80
1 3062 Criminal Justice 2008 18 8.5 NA NA Riz Ahmed,Bill Camp,Payman Maadi,John Turturro... Crime,Drama,Mystery United States ... 525 tv series 1 0 1 0 0 1 Hulu 8.75
2 3157 Bleak House 1985 7 8.3 NA NA Anna Maxwell Martin,Denis Lawson,Carey Mulliga... Crime,Drama United Kingdom,United States ... 510 tv series 1 0 1 1 0 1 Prime Video 8.50
3 2553 Bleak House 2005 7 8.3 NA NA Anna Maxwell Martin,Denis Lawson,Carey Mulliga... Crime,Drama United Kingdom,United States ... 510 tv series 1 0 1 0 0 1 Hulu 8.50
4 2441 The Looming Tower 2018 18 8 88 NA Jeff Daniels,Tahar Rahim,Wrenn Schmidt,Bill Ca... Drama,History United States ... 492 tv series 1 0 1 0 0 1 Hulu 8.20

5 rows × 22 columns

In [75]:
fig = px.bar(y = hulu_screentimes_high_tvshows['Title'][:15],
             x = hulu_screentimes_high_tvshows['Screentime'][:15], 
             color = hulu_screentimes_high_tvshows['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Screentime : In Hours'},
             title  = 'TV Shows with Highest Screentime in Hours : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [76]:
fig = px.bar(y = hulu_screentimes_low_tvshows['Title'][:15],
             x = hulu_screentimes_low_tvshows['Screentime'][:15], 
             color = hulu_screentimes_low_tvshows['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Screentime : In Hours'},
             title  = 'TV Shows with Lowest Screentime in Hours : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [77]:
prime_video_screentimes_high_tvshows = df_screentimes_high_tvshows.loc[df_screentimes_high_tvshows['Prime Video']==1].reset_index()
prime_video_screentimes_high_tvshows = prime_video_screentimes_high_tvshows.drop(['index'], axis = 1)
 
prime_video_screentimes_low_tvshows = df_screentimes_low_tvshows.loc[df_screentimes_low_tvshows['Prime Video']==1].reset_index()
prime_video_screentimes_low_tvshows = prime_video_screentimes_low_tvshows.drop(['index'], axis = 1)
 
prime_video_screentimes_high_tvshows.head(5)
Out[77]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider Screentime
0 367 Colorado 1940 0 8.3 NA NA Raymond Burr,Barbara Carrera,Richard Chamberla... Action,Adventure,Drama,Romance,Western United States ... 1256 tv series 1 0 0 1 0 1 Prime Video 20.93
1 3843 Baseball 1994 7 9.2 NA NA John Chancellor,Daniel Okrent,Ossie Davis,Paul... Documentary,History,Sport United States ... 1140 tv series 1 0 0 1 0 1 Prime Video 19.00
2 3835 Too Old to Die Young 2019 18 7.4 70 NA Miles Teller,Augusto Aguilera,Cristina Rodlo,N... Crime,Drama,Thriller United States ... 758 tv series 1 0 0 1 0 1 Prime Video 12.63
3 3989 Mankind: The Story of All of Us 2012 7 7.8 83 NA Josh Brolin,Richard Machowicz,James Meigs,Geor... Documentary,History United States ... 720 tv series 1 0 0 1 0 1 Prime Video 12.00
4 3850 Brideshead Revisited 1981 NR 8.5 80 NA Jeremy Irons,Diana Quick,Roger Milner,Phoebe N... Drama,Romance United Kingdom ... 659 tv series 1 0 0 1 0 1 Prime Video 10.98

5 rows × 22 columns

In [78]:
fig = px.bar(y = prime_video_screentimes_high_tvshows['Title'][:15],
             x = prime_video_screentimes_high_tvshows['Screentime'][:15], 
             color = prime_video_screentimes_high_tvshows['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Screentime : In Hours'},
             title  = 'TV Shows with Highest Screentime in Hours : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [79]:
fig = px.bar(y = prime_video_screentimes_low_tvshows['Title'][:15],
             x = prime_video_screentimes_low_tvshows['Screentime'][:15], 
             color = prime_video_screentimes_low_tvshows['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Screentime : In Hours'},
             title  = 'TV Shows with Lowest Screentime in Hours : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [80]:
disney_screentimes_high_tvshows = df_screentimes_high_tvshows.loc[df_screentimes_high_tvshows['Disney+']==1].reset_index()
disney_screentimes_high_tvshows = disney_screentimes_high_tvshows.drop(['index'], axis = 1)
 
disney_screentimes_low_tvshows = df_screentimes_low_tvshows.loc[df_screentimes_low_tvshows['Disney+']==1].reset_index()
disney_screentimes_low_tvshows = disney_screentimes_low_tvshows.drop(['index'], axis = 1)
 
disney_screentimes_high_tvshows.head(5)
Out[80]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider Screentime
0 487 Stargirl 2020 7 7.3 70 NA Brec Bassinger,Yvette Monreal,Anjelika Washing... Action,Adventure,Crime,Drama,Fantasy,Sci-Fi United States ... 566 tv series 2 0 0 0 1 1 Disney+ 9.43
1 5430 Wild Russia 2018 7 8.4 NA NA Christian Brückner,Jason Hildebrandt,Paterson ... Documentary Germany ... 360 tv series 2 0 0 0 1 1 Disney+ 6.00
2 484 Invincible 2006 7 8.8 72 Angelina Jolie Steven Yeun,Sandra Oh,J.K. Simmons,Zazie Beetz... Animation,Action,Adventure,Drama,Fantasy,Horro... United States ... 137 tv series 1 0 0 0 1 1 Disney+ 2.28
3 5337 Dog Whisperer 2004 0 8 NA NA Cesar Millan,Daddy,Paul Dini,Ilusion Millan,Sh... Family,Reality-TV United States ... 110 tv series 9 0 0 0 1 1 Disney+ 1.83
4 486 Ruby Bridges 1998 7 7.2 83 Euzhan Palcy Penelope Ann Miller,Kevin Pollak,Michael Beach... Adventure,Comedy,Drama,Family,Fantasy United States ... 96 tv series NA 0 0 0 1 1 Disney+ 1.60

5 rows × 22 columns

In [81]:
fig = px.bar(y = disney_screentimes_high_tvshows['Title'][:15],
             x = disney_screentimes_high_tvshows['Screentime'][:15], 
             color = disney_screentimes_high_tvshows['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Screentime : In Hours'},
             title  = 'TV Shows with Highest Screentime in Hours : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [82]:
fig = px.bar(y = disney_screentimes_low_tvshows['Title'][:15],
             x = disney_screentimes_low_tvshows['Screentime'][:15], 
             color = disney_screentimes_low_tvshows['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Screentime : In Hours'},
             title  = 'TV Shows with Lowest Screentime in Hours : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [83]:
print(f'''
      The TV Show with Highest Screentime  Ever Got is '{df_screentimes_high_tvshows['Title'][0]}' : '{df_screentimes_high_tvshows['Screentime'].max()}'\n
      The TV Show with Lowest Screentime  Ever Got is '{df_screentimes_low_tvshows['Title'][0]}' : '{df_screentimes_low_tvshows['Screentime'].min()}'\n
      
      The TV Show with Highest Screentime  on 'Netflix' is '{netflix_screentimes_high_tvshows['Title'][0]}' : '{netflix_screentimes_high_tvshows['Screentime'].max()}'\n
      The TV Show with Lowest Screentime  on 'Netflix' is '{netflix_screentimes_low_tvshows['Title'][0]}' : '{netflix_screentimes_low_tvshows['Screentime'].min()}'\n
      
      The TV Show with Highest Screentime  on 'Hulu' is '{hulu_screentimes_high_tvshows['Title'][0]}' : '{hulu_screentimes_high_tvshows['Screentime'].max()}'\n
      The TV Show with Lowest Screentime  on 'Hulu' is '{hulu_screentimes_low_tvshows['Title'][0]}' : '{hulu_screentimes_low_tvshows['Screentime'].min()}'\n
      
      The TV Show with Highest Screentime  on 'Prime Video' is '{prime_video_screentimes_high_tvshows['Title'][0]}' : '{prime_video_screentimes_high_tvshows['Screentime'].max()}'\n
      The TV Show with Lowest Screentime  on 'Prime Video' is '{prime_video_screentimes_low_tvshows['Title'][0]}' : '{prime_video_screentimes_low_tvshows['Screentime'].min()}'\n
      
      The TV Show with Highest Screentime  on 'Disney+' is '{disney_screentimes_high_tvshows['Title'][0]}' : '{disney_screentimes_high_tvshows['Screentime'].max()}'\n
      The TV Show with Lowest Screentime  on 'Disney+' is '{disney_screentimes_low_tvshows['Title'][0]}' : '{disney_screentimes_low_tvshows['Screentime'].min()}'\n 
      ''')
      The TV Show with Highest Screentime  Ever Got is 'Colorado' : '20.93'

      The TV Show with Lowest Screentime  Ever Got is 'DoongDoong' : '0.02'

      
      The TV Show with Highest Screentime  on 'Netflix' is 'The Vietnam War' : '16.5'

      The TV Show with Lowest Screentime  on 'Netflix' is 'Larva' : '0.03'

      
      The TV Show with Highest Screentime  on 'Hulu' is 'Roots' : '9.8'

      The TV Show with Lowest Screentime  on 'Hulu' is 'Mighty Magiswords' : '0.05'

      
      The TV Show with Highest Screentime  on 'Prime Video' is 'Colorado' : '20.93'

      The TV Show with Lowest Screentime  on 'Prime Video' is 'DoongDoong' : '0.02'

      
      The TV Show with Highest Screentime  on 'Disney+' is 'Stargirl' : '9.43'

      The TV Show with Lowest Screentime  on 'Disney+' is 'Pixar in Real Life' : '0.07'
 
      
In [84]:
print(f'''
      Accross All Platforms the Average Screentime  is '{round(df_tvshows_screentimes['Screentime'].mean(), ndigits = 2)}'\n
      The Average Screentime  on 'Netflix' is '{round(netflix_screentimes_tvshows['Screentime'].mean(), ndigits = 2)}'\n
      The Average Screentime  on 'Hulu' is '{round(hulu_screentimes_tvshows['Screentime'].mean(), ndigits = 2)}'\n
      The Average Screentime  on 'Prime Video' is '{round(prime_video_screentimes_tvshows['Screentime'].mean(), ndigits = 2)}'\n
      The Average Screentime  on 'Disney+' is '{round(disney_screentimes_tvshows['Screentime'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Screentime  is '0.92'

      The Average Screentime  on 'Netflix' is '0.93'

      The Average Screentime  on 'Hulu' is '0.76'

      The Average Screentime  on 'Prime Video' is '1.05'

      The Average Screentime  on 'Disney+' is '0.64'
 
      
In [85]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_tvshows_screentimes['Screentime'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_tvshows_screentimes['Screentime'], ax = ax[1])
plt.show()
In [86]:
# Defining plot size and title
plt.figure(figsize = (20, 10))
plt.title('Screentime s Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_screentimes_tvshows['Screentime'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_screentimes_tvshows['Screentime'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_screentimes_tvshows['Screentime'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_screentimes_tvshows['Screentime'][:100], color = 'darkblue', legend = True, kde = True) 
 
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
In [87]:
def round_val(data):
    if str(data) != 'nan':
        return round(data)
In [88]:
df_tvshows_screentimes_group = df_tvshows_screentimes.copy()
In [89]:
df_tvshows_screentimes_group['Screentime Group'] = df_tvshows_screentimes['Screentime'].apply(round_val)
 
screentimes_values = df_tvshows_screentimes_group['Screentime Group'].value_counts().sort_index(ascending = False).tolist()
screentimes_index = df_tvshows_screentimes_group['Screentime Group'].value_counts().sort_index(ascending = False).index
 
# screentimes_values, screentimes_index
In [90]:
screentimes_group_count = df_tvshows_screentimes_group.groupby('Screentime Group')['Title'].count()
screentimes_group_tvshows = df_tvshows_screentimes_group.groupby('Screentime Group')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
screentimes_group_data_tvshows = pd.concat([screentimes_group_count, screentimes_group_tvshows], axis = 1).reset_index().rename(columns = {'Title' : 'TV Shows Count'})
screentimes_group_data_tvshows = screentimes_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)
In [91]:
# Screentime Group with TV Shows Counts - All Platforms Combined
screentimes_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)
Out[91]:
Screentime Group TV Shows Count Netflix Hulu Prime Video Disney+
1 1 1902 769 588 652 35
0 0 1776 568 664 583 118
2 2 144 55 21 71 5
3 3 50 14 7 32 0
4 4 44 16 6 24 0
6 6 38 16 6 16 1
5 5 30 11 3 17 0
7 7 10 4 3 3 0
8 8 9 3 5 2 0
9 9 5 1 1 2 1
10 10 5 2 1 2 0
11 11 3 1 0 2 0
12 12 1 0 0 1 0
13 13 1 0 0 1 0
14 14 1 1 0 0 0
15 16 1 1 0 0 0
16 19 1 0 0 1 0
17 21 1 0 0 1 0
In [92]:
screentimes_group_data_tvshows.sort_values(by = 'Screentime Group', ascending = False)
Out[92]:
Screentime Group TV Shows Count Netflix Hulu Prime Video Disney+
17 21 1 0 0 1 0
16 19 1 0 0 1 0
15 16 1 1 0 0 0
14 14 1 1 0 0 0
13 13 1 0 0 1 0
12 12 1 0 0 1 0
11 11 3 1 0 2 0
10 10 5 2 1 2 0
9 9 5 1 1 2 1
8 8 9 3 5 2 0
7 7 10 4 3 3 0
6 6 38 16 6 16 1
5 5 30 11 3 17 0
4 4 44 16 6 24 0
3 3 50 14 7 32 0
2 2 144 55 21 71 5
1 1 1902 769 588 652 35
0 0 1776 568 664 583 118
In [93]:
fig = px.bar(y = screentimes_group_data_tvshows['TV Shows Count'],
             x = screentimes_group_data_tvshows['Screentime Group'], 
             color = screentimes_group_data_tvshows['Screentime Group'],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows Count', 'x' : 'Screentime : In Hours'},
             title  = 'TV Shows with Group Screentime in Hours : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [94]:
fig = px.pie(screentimes_group_data_tvshows[:10],
             names = screentimes_group_data_tvshows['Screentime Group'],
             values = screentimes_group_data_tvshows['TV Shows Count'],
             color = screentimes_group_data_tvshows['TV Shows Count'],
             color_discrete_sequence = px.colors.sequential.Teal)

fig.update_traces(textinfo = 'percent+label',
                  title = 'TV Shows Count based on Screentime Group')
fig.show()
In [95]:
df_screentimes_group_high_tvshows = screentimes_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False).reset_index()
df_screentimes_group_high_tvshows = df_screentimes_group_high_tvshows.drop(['index'], axis = 1)
# filter = (screentimes_group_data_tvshows['TV Shows Count'] ==  (screentimes_group_data_tvshows['TV Shows Count'].max()))
# df_screentimes_group_high_tvshows = screentimes_group_data_tvshows[filter]
 
# highest_rated_tvshows = screentimes_group_data_tvshows.loc[screentimes_group_data_tvshows['TV Shows Count'].idxmax()]
 
# print('\nScreentime with Highest Ever TV Shows Count are : All Platforms Combined\n')
df_screentimes_group_high_tvshows.head(5)
Out[95]:
Screentime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 1 1902 769 588 652 35
1 0 1776 568 664 583 118
2 2 144 55 21 71 5
3 3 50 14 7 32 0
4 4 44 16 6 24 0
In [96]:
df_screentimes_group_low_tvshows = screentimes_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = True).reset_index()
df_screentimes_group_low_tvshows = df_screentimes_group_low_tvshows.drop(['index'], axis = 1)
# filter = (screentimes_group_data_tvshows['TV Shows Count'] = =  (screentimes_group_data_tvshows['TV Shows Count'].min()))
# df_screentimes_group_low_tvshows = screentimes_group_data_tvshows[filter]
 
# print('\nScreentime with Lowest Ever TV Shows Count are : All Platforms Combined\n')
df_screentimes_group_low_tvshows.head(5)
Out[96]:
Screentime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 21 1 0 0 1 0
1 16 1 1 0 0 0
2 14 1 1 0 0 0
3 13 1 0 0 1 0
4 12 1 0 0 1 0
In [97]:
print(f'''
      Total '{df_tvshows_screentimes['Screentime'].count()}' Titles are available on All Platforms, out of which\n
      You Can Choose to see TV Shows from Total '{screentimes_group_data_tvshows['Screentime Group'].unique().shape[0]}' Screentime Group, They were Like this, \n
 
      {screentimes_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)['Screentime Group'].unique()} etc. \n
 
      The Screentime Group with Highest TV Shows Count have '{screentimes_group_data_tvshows['TV Shows Count'].max()}' TV Shows Available is '{df_screentimes_group_high_tvshows['Screentime Group'][0]}', &\n
      The Screentime Group with Lowest TV Shows Count have '{screentimes_group_data_tvshows['TV Shows Count'].min()}' TV Shows Available is '{df_screentimes_group_low_tvshows['Screentime Group'][0]}'
      ''')
      Total '4022' Titles are available on All Platforms, out of which

      You Can Choose to see TV Shows from Total '18' Screentime Group, They were Like this, 

 
      [ 1  0  2  3  4  6  5  7  8  9 10 11 12 13 14 16 19 21] etc. 

 
      The Screentime Group with Highest TV Shows Count have '1902' TV Shows Available is '1', &

      The Screentime Group with Lowest TV Shows Count have '1' TV Shows Available is '21'
      
In [98]:
netflix_screentimes_group_tvshows = screentimes_group_data_tvshows[screentimes_group_data_tvshows['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_screentimes_group_tvshows = netflix_screentimes_group_tvshows.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
 
netflix_screentimes_group_high_tvshows = df_screentimes_group_high_tvshows.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_screentimes_group_high_tvshows = netflix_screentimes_group_high_tvshows.drop(['index'], axis = 1)
 
netflix_screentimes_group_low_tvshows = df_screentimes_group_high_tvshows.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_screentimes_group_low_tvshows = netflix_screentimes_group_low_tvshows.drop(['index'], axis = 1)
 
netflix_screentimes_group_high_tvshows.head(5)
Out[98]:
Screentime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 1 1902 769 588 652 35
1 0 1776 568 664 583 118
2 2 144 55 21 71 5
3 4 44 16 6 24 0
4 6 38 16 6 16 1
In [99]:
hulu_screentimes_group_tvshows = screentimes_group_data_tvshows[screentimes_group_data_tvshows['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_screentimes_group_tvshows = hulu_screentimes_group_tvshows.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
 
hulu_screentimes_group_high_tvshows = df_screentimes_group_high_tvshows.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_screentimes_group_high_tvshows = hulu_screentimes_group_high_tvshows.drop(['index'], axis = 1)
 
hulu_screentimes_group_low_tvshows = df_screentimes_group_high_tvshows.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_screentimes_group_low_tvshows = hulu_screentimes_group_low_tvshows.drop(['index'], axis = 1)
 
hulu_screentimes_group_high_tvshows.head(5)
Out[99]:
Screentime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 0 1776 568 664 583 118
1 1 1902 769 588 652 35
2 2 144 55 21 71 5
3 3 50 14 7 32 0
4 4 44 16 6 24 0
In [100]:
prime_video_screentimes_group_tvshows = screentimes_group_data_tvshows[screentimes_group_data_tvshows['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_screentimes_group_tvshows = prime_video_screentimes_group_tvshows.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'TV Shows Count'], axis = 1)
 
prime_video_screentimes_group_high_tvshows = df_screentimes_group_high_tvshows.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_screentimes_group_high_tvshows = prime_video_screentimes_group_high_tvshows.drop(['index'], axis = 1)
 
prime_video_screentimes_group_low_tvshows = df_screentimes_group_high_tvshows.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_screentimes_group_low_tvshows = prime_video_screentimes_group_low_tvshows.drop(['index'], axis = 1)
 
prime_video_screentimes_group_high_tvshows.head(5)
Out[100]:
Screentime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 1 1902 769 588 652 35
1 0 1776 568 664 583 118
2 2 144 55 21 71 5
3 3 50 14 7 32 0
4 4 44 16 6 24 0
In [101]:
disney_screentimes_group_tvshows = screentimes_group_data_tvshows[screentimes_group_data_tvshows['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_screentimes_group_tvshows = disney_screentimes_group_tvshows.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'TV Shows Count'], axis = 1)
 
disney_screentimes_group_high_tvshows = df_screentimes_group_high_tvshows.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_screentimes_group_high_tvshows = disney_screentimes_group_high_tvshows.drop(['index'], axis = 1)
 
disney_screentimes_group_low_tvshows = df_screentimes_group_high_tvshows.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_screentimes_group_low_tvshows = disney_screentimes_group_low_tvshows.drop(['index'], axis = 1)
 
disney_screentimes_group_high_tvshows.head(5)
Out[101]:
Screentime Group TV Shows Count Netflix Hulu Prime Video Disney+
0 0 1776 568 664 583 118
1 1 1902 769 588 652 35
2 2 144 55 21 71 5
3 6 38 16 6 16 1
4 9 5 1 1 2 1
In [102]:
print(f'''
      The Screentime Group with Highest TV Shows Count Ever Got is '{df_screentimes_group_high_tvshows['Screentime Group'][0]}' : '{df_screentimes_group_high_tvshows['TV Shows Count'].max()}'\n
      The Screentime Group with Lowest TV Shows Count Ever Got is '{df_screentimes_group_low_tvshows['Screentime Group'][0]}' : '{df_screentimes_group_low_tvshows['TV Shows Count'].min()}'\n
      
      The Screentime Group with Highest TV Shows Count on 'Netflix' is '{netflix_screentimes_group_high_tvshows['Screentime Group'][0]}' : '{netflix_screentimes_group_high_tvshows['Netflix'].max()}'\n
      The Screentime Group with Lowest TV Shows Count on 'Netflix' is '{netflix_screentimes_group_low_tvshows['Screentime Group'][0]}' : '{netflix_screentimes_group_low_tvshows['Netflix'].min()}'\n
      
      The Screentime Group with Highest TV Shows Count on 'Hulu' is '{hulu_screentimes_group_high_tvshows['Screentime Group'][0]}' : '{hulu_screentimes_group_high_tvshows['Hulu'].max()}'\n
      The Screentime Group with Lowest TV Shows Count on 'Hulu' is '{hulu_screentimes_group_low_tvshows['Screentime Group'][0]}' : '{hulu_screentimes_group_low_tvshows['Hulu'].min()}'\n
      
      The Screentime Group with Highest TV Shows Count on 'Prime Video' is '{prime_video_screentimes_group_high_tvshows['Screentime Group'][0]}' : '{prime_video_screentimes_group_high_tvshows['Prime Video'].max()}'\n
      The Screentime Group with Lowest TV Shows Count on 'Prime Video' is '{prime_video_screentimes_group_low_tvshows['Screentime Group'][0]}' : '{prime_video_screentimes_group_low_tvshows['Prime Video'].min()}'\n
      
      The Screentime Group with Highest TV Shows Count on 'Disney+' is '{disney_screentimes_group_high_tvshows['Screentime Group'][0]}' : '{disney_screentimes_group_high_tvshows['Disney+'].max()}'\n
      The Screentime Group with Lowest TV Shows Count on 'Disney+' is '{disney_screentimes_group_low_tvshows['Screentime Group'][0]}' : '{disney_screentimes_group_low_tvshows['Disney+'].min()}'\n 
      ''')
      The Screentime Group with Highest TV Shows Count Ever Got is '1' : '1902'

      The Screentime Group with Lowest TV Shows Count Ever Got is '21' : '1'

      
      The Screentime Group with Highest TV Shows Count on 'Netflix' is '1' : '769'

      The Screentime Group with Lowest TV Shows Count on 'Netflix' is '21' : '0'

      
      The Screentime Group with Highest TV Shows Count on 'Hulu' is '0' : '664'

      The Screentime Group with Lowest TV Shows Count on 'Hulu' is '21' : '0'

      
      The Screentime Group with Highest TV Shows Count on 'Prime Video' is '1' : '652'

      The Screentime Group with Lowest TV Shows Count on 'Prime Video' is '16' : '0'

      
      The Screentime Group with Highest TV Shows Count on 'Disney+' is '0' : '118'

      The Screentime Group with Lowest TV Shows Count on 'Disney+' is '8' : '0'
 
      
In [103]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_scr_ax1 = sns.barplot(x = netflix_screentimes_group_tvshows['Screentime Group'][:10], y = netflix_screentimes_group_tvshows['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_scr_ax2 = sns.barplot(x = hulu_screentimes_group_tvshows['Screentime Group'][:10], y = hulu_screentimes_group_tvshows['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_scr_ax3 = sns.barplot(x = prime_video_screentimes_group_tvshows['Screentime Group'][:10], y = prime_video_screentimes_group_tvshows['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_scr_ax4 = sns.barplot(x = disney_screentimes_group_tvshows['Screentime Group'][:10], y = disney_screentimes_group_tvshows['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_scr_ax1.title.set_text(labels[0])
h_scr_ax2.title.set_text(labels[1])
p_scr_ax3.title.set_text(labels[2])
d_scr_ax4.title.set_text(labels[3])
 
plt.show()
In [104]:
plt.figure(figsize = (20, 5))
sns.lineplot(x = screentimes_group_data_tvshows['Screentime Group'], y = screentimes_group_data_tvshows['Netflix'], color = 'red')
sns.lineplot(x = screentimes_group_data_tvshows['Screentime Group'], y = screentimes_group_data_tvshows['Hulu'], color = 'lightgreen')
sns.lineplot(x = screentimes_group_data_tvshows['Screentime Group'], y = screentimes_group_data_tvshows['Prime Video'], color = 'lightblue')
sns.lineplot(x = screentimes_group_data_tvshows['Screentime Group'], y = screentimes_group_data_tvshows['Disney+'], color = 'darkblue')
plt.xlabel('Screentime Group', fontsize = 15)
plt.ylabel('TV Shows Count', fontsize = 15)
plt.show()
In [105]:
print(f'''
      Accross All Platforms Total Count of Screentime Group is '{screentimes_group_data_tvshows['Screentime Group'].unique().shape[0]}'\n
      Total Count of Screentime Group on 'Netflix' is '{netflix_screentimes_group_tvshows['Screentime Group'].unique().shape[0]}'\n
      Total Count of Screentime Group on 'Hulu' is '{hulu_screentimes_group_tvshows['Screentime Group'].unique().shape[0]}'\n
      Total Count of Screentime Group on 'Prime Video' is '{prime_video_screentimes_group_tvshows['Screentime Group'].unique().shape[0]}'\n
      Total Count of Screentime Group on 'Disney+' is '{disney_screentimes_group_tvshows['Screentime Group'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Screentime Group is '18'

      Total Count of Screentime Group on 'Netflix' is '14'

      Total Count of Screentime Group on 'Hulu' is '11'

      Total Count of Screentime Group on 'Prime Video' is '16'

      Total Count of Screentime Group on 'Disney+' is '5'
 
      
In [106]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_scr_ax1 = sns.lineplot(y = screentimes_group_data_tvshows['Screentime Group'], x = screentimes_group_data_tvshows['Netflix'], color = 'red', ax = axes[0, 0])
h_scr_ax2 = sns.lineplot(y = screentimes_group_data_tvshows['Screentime Group'], x = screentimes_group_data_tvshows['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_scr_ax3 = sns.lineplot(y = screentimes_group_data_tvshows['Screentime Group'], x = screentimes_group_data_tvshows['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_scr_ax4 = sns.lineplot(y = screentimes_group_data_tvshows['Screentime Group'], x = screentimes_group_data_tvshows['Disney+'], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_scr_ax1.title.set_text(labels[0])
h_scr_ax2.title.set_text(labels[1])
p_scr_ax3.title.set_text(labels[2])
d_scr_ax4.title.set_text(labels[3])

plt.show()
In [107]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.barplot(x = screentimes_group_data_tvshows['Screentime Group'][:10], y = screentimes_group_data_tvshows['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = screentimes_group_data_tvshows['Screentime Group'][:10], y = screentimes_group_data_tvshows['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = screentimes_group_data_tvshows['Screentime Group'][:10], y = screentimes_group_data_tvshows['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = screentimes_group_data_tvshows['Screentime Group'][:10], y = screentimes_group_data_tvshows['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
 
plt.show()